##########################################################################################################
##########################################################################################################
####Non-state Atrocities in Capital Cities - CEM Matching Analysis: Urban Cells in Afflicted Countries####
##########################################################################################################
##########################################################################################################
library(plyr)
library(cem)
library(foreign)
library(doBy)
library(plyr)
library(mvtnorm)

##############################
###Create Data for Matching###
##############################
# Set working library
setwd("~/Data/Global Analysis/")
# Read in main data
main.data <- read.dta("full.grid.upd.dta")
###Subset data for only countries that experienced at least one insurgent atrocity
#Summarize insurgent atrocities by country
at.count <- summaryBy(incidentnonstatefull ~ ccode, FUN=c("sum"), data=main.data, na.rm=TRUE)
names(at.count) <- c("ccode", "incidentnonstatefull.sum.count")
#Join with main dataset
at.count.join <- join(main.data, at.count)
#Subset only grid-cell years in countries that experienced at least one insurgent atrocity within the period of analysis
main.data.at <- subset(at.count.join,  at.count.join$incidentnonstatefull.sum.count>0)
##Finally, subset only urban areas within this sample
main.data.at.urb <- subset(main.data.at, lagurban>0)
#Next, keep the variables used for matching and remove NAs
prop.at.urb<-na.omit(main.data.at.urb[,c("NSAtBin", "lag_Capital", "loglagttime", "loglagppp", "loglagbdist1", "loglagpop")])
#Create a binary dependent variable denoting whether a given grid cell experienced at least on insurgent atrocity or not in a given year
prop.at.urb$NSAtBin <- ifelse(prop.at.urb$NSAtBin>0,1,0)
#Make sure both the treatment and DV are integers
prop.at.urb$NSAtBin <- as.integer(prop.at.urb$NSAtBin)
prop.at.urb$lag_Capital <- as.integer(prop.at.urb$lag_Capital)

######################
###Cell Year Sample###
######################
###Standard CEM (without extrapolation)
#Create a matrix
mat.at.urb <- cem(treatment="lag_Capital", data=prop.at.urb, keep.all = TRUE, drop="NSAtBin")
mat.at.urb
#Regress on treatment
log.t.at.urb <-  att(mat.at.urb, NSAtBin~lag_Capital, data=prop.at.urb, model='logit', extrapolate = FALSE)
summary(log.t.at.urb)
#Plot the results
pdf("cematurb.pdf")
plot(log.t.at.urb, mat.at.urb, prop.at.urb, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

###CEM with extrapolation
#Regress on treatment
log.t.e.at.urb <-  att(mat.at.urb, NSAtBin~lag_Capital, data=prop.at.urb, model='logit', extrapolate = TRUE)
summary(log.t.e.at.urb)
#Plot the results
pdf("cemextaturb.pdf")
plot(log.t.e.at.urb, mat.at.urb, prop.at.urb, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

#####################################################
###Values Averged by Cell for the 1996-2009 Period###
#####################################################
#collapse
prop.i <- summaryBy(NSAtBin+lag_Capital+loglagppp+loglagttime+loglagbdist1+loglagpop ~ gid, FUN=c("mean"), data=main.data.at.urb, na.rm=TRUE)
names(prop.i) <- c("gid", "NSAtBin", "lag_Capital", "loglagttime","lagppp", "lagbdist1", "loglagpop")
#Remove NAs
prop.i <-na.omit(prop.i)
#Re-code the DV and TV as binary
prop.i$lag_Capital <- as.numeric(ifelse(prop.i$lag_Capital>0,1,0))
prop.i$NSAtBin <- as.numeric(ifelse(prop.i$NSAtBin>0,1,0))
#Remove the GID indicator from dataset
prop.i$gid <- NULL

###Standard CEM (without extrapolation)
#Create a matrix
matg.urb <- cem(treatment="lag_Capital", data=prop.i, keep.all = TRUE, drop="NSAtBin")
matg.urb
#Regress on treatment
log.t.g.urb <-  att(matg.urb, NSAtBin~lag_Capital, data=prop.i, model='logit', extrapolate = FALSE)
summary(log.t.g.urb)
#Plot the results
pdf("cemgidaturb.pdf")
plot(log.t.g.urb, matg.urb, prop.i, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()

#CEM with extraploation
#Regress on treatment
log.t.g.e.urb <-  att(matg.urb, NSAtBin~lag_Capital, data=prop.i, model='logit', extrapolate = TRUE)
summary(log.t.g.e.urb)
#Plot the results
pdf("cemgidextaturb.pdf")
plot(log.t.g.e.urb, matg.urb, prop.i, vars=c("lagppp","loglagpop","lagttime","lagbdist1"))
dev.off()